home *** CD-ROM | disk | FTP | other *** search
-
- #include <stdio.h>
- #include <string.h>
-
- typedef int BOOL;
- #define TRUE 1
- #define FALSE 0
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- FILE *fp;
- char filename[256];
- char buffer[1024];
- char *p;
- BOOL found = FALSE;
- BOOL tag = FALSE;
- int lines = 0;
-
- if (argc != 3) {
- fprintf(stderr, "\n\
- This program takes an HTML document and extracts to its stdout\n\
- the TITLE of the document, all in one line.\n\n\
- Usage:\n\
- \t%s directory filename\n\n", argv[0]);
- exit(1);
- }
-
- strcpy(filename, argv[1]);
- if (*filename)
- strcat(filename, "/");
- strcat(filename, argv[2]);
-
- if (!(fp = fopen(filename, "r"))) {
- fprintf(stderr, "%s: Unable to open file \"%s\"\n",
- argv[0], filename);
- exit(2);
- }
-
- while (lines++ < 20 && /* Scan only first 20 lines */
- NULL != (p = fgets(buffer, 1024, fp))) {
- if (*p)
- p[strlen(p)-1] = NULL; /* Overwrite newline */
- while (p && *p) {
- if (tag) {
- p = strchr(p, '>');
- if (p) {
- p++;
- tag = FALSE;
- }
- else continue;
- }
- if (found)
- while (*p && *p != '<') fputc(*(p++), stdout);
- else
- while (*p && *p != '<') p++;
- if (!*p) {
- if (found)
- fputc(' ', stdout); /* We replace newline with space */
- continue;
- }
- else if (!found && (!strncmp(p, "<TITLE>", 7) ||
- !strncmp(p, "<title>", 7) ||
- !strncmp(p, "<Title>", 7))) {
- p += 7;
- found = TRUE;
- }
- else if (found && (!strncmp(p, "</TITLE>", 8) ||
- !strncmp(p, "</title>", 8) ||
- !strncmp(p, "</Title>", 8))) {
- fclose(fp);
- fputc('\n', stdout);
- exit(0);
- }
- else tag = TRUE;
- } /* while stuff in buffer */
- } /* while not EOF and not very many lines read */
-
- /* If we come here, the title was not found among */
- /* the first few lines. */
- printf("%s\n", argv[2]); /* Then using filename */
- fclose(fp);
- }
-
-